1 Load Packages

if (!require("dplyr")) {install.packages("dplyr"); require("dplyr")}
if (!require("Seurat")) {install.packages("Seurat"); require("Seurat")}
if (!require("patchwork")) {install.packages("patchwork"); require("patchwork")}
if (!require("knitr")) {install.packages("knitr"); require("knitr")}
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!require("BiocManager")) {install.packages("BiocManager"); require("BiocManager")}
if (!require("tibble")) {install.packages("tibble"); require("tibble")}
if (!require("ggpmisc")) {install.packages("ggpmisc"); require("ggpmisc")}
if (!require("RColorBrewer")) {install.packages("RColorBrewer"); require("RColorBrewer")} #color
if (!require("ggrepel")) {install.packages("ggrepel"); require("ggrepel")}
if (!require("DESeq2")) {BiocManager::install('DESeq2'); require("DESeq2")}
if (!require("here")) {install.packages("here"); require("here")}
if (!require("stringr")) {install.packages("stringr"); require("stringr")}
if (!require("car")) {install.packages("car"); require("car")}
if (!require("openxlsx")) {install.packages("openxlsx"); require("openxlsx")}
if (!require("readxl")) {install.packages("readxl"); require("readxl")}
if (!require("data.table")) {install.packages("data.table"); require("data.table")}
if (!require("ggvenn")) {install.packages("ggvenn"); require("ggvenn")}
if (!require("kableExtra")) {install.packages("kableExtra"); require("kableExtra")} # for color brewer
if (!require("gplots")) {install.packages("gplots"); require("gplots")} # for color brewer
if (!require("clusterProfiler")) {BiocManager::install('clusterProfiler'); require("clusterProfiler")}
if (!require("enrichplot")) {BiocManager::install('enrichplot'); require("enrichplot")}
if (!require("tidyverse")) {install.packages("tidyverse"); require("tidyverse")} # for data frame transformation
if (!require("plotly")) {install.packages("plotly"); require("plotly")}
library("EnhancedVolcano")

# Install from scratch
if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("RSQLite")  # Core dependency
BiocManager::install("org.Mm.eg.db")
BiocManager::install("clusterProfiler")


library(org.Mm.eg.db)
library(clusterProfiler)
library(AnnotationDbi)
library(dplyr)
library(magrittr)




library("EnhancedVolcano")
library(UpSetR)
options(future.globals.maxSize = 74 * 1024^3) # 55 GB
getOption("future.globals.maxSize") #59055800320
## [1] 79456894976

1.1 Load Dataset

load(here("jk_code", "macula_densa.rds"))

2 Final UMAP

UMAP plot summarizing the final clustering analysis of the dataset. Clusters were identified using the top 15 principal components and resolution 1 following SCTransform normalization. The UMAP visualization displays the separation of distinct cell populations.

seu <- SCTransform(seu) %>%
    RunPCA() %>%
    FindNeighbors(dims = 1:15) %>%
    FindClusters(resolution = 1) %>%
    RunUMAP(dims = 1:15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11384
## Number of edges: 353418
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7665
## Number of communities: 17
## Elapsed time: 1 seconds
DimPlot(seu)

2.1 Confirmation of Macula Densa Markers

Every cluster here represents the macula densa
Markers are from a known list used to identify kidney cell types

DotPlot(seu,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

3 FindMarkers of Clusters

Cluster markers were identified for each group to characterize cellular diversity within the dataset.The accompanying heatmap highlights the top three genes per cluster, ranked by average log2 fold change (avg_log2FC) and filtered for statistical significance (p_val <0.05). This analysis revealed expression patterns, such as distinct Egf and Umod expression in select clusters and very mild differential expression among the first five clusters, supporting the classification of macula densa subtypes. Additionally, selected genes—including Ptger3, Pappa2, Egf, Foxq1, Fos, Egr1, and Cxcl10—were visualized to further illustrate key differences and marker distributions across clusters.

all_markers <- FindAllMarkers(seu, only.pos = TRUE, min.pct = 0.5, logfc.threshold = 0.5)

all_markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 0.5, p_val_adj < 0.05) %>%
    slice_head(n = 3) %>%
    ungroup() -> top3

DoHeatmap(seu, features = top3$gene) + NoLegend()

markers.to.plot3 <- c("Ptger3","Pappa2","Egf","Foxq1","Fos","Egr1","Cxcl10")

3.1 Grouping into Macula Densa subtypes

The seurat cluster order was changed to visually show similarities between clusters on a multidimensional dotplot

The Dotplot shows 4 patterns in macula densa. There were clusters highly expressing Pappa2,Egf,Fos,and Cxcl10. These became the hallmark markers for identifying my macula densa types. Within each type identification besides Cxcl10, Theres unique expressions of a few specific genes.

Type 1 Clusters are a bit difficult but some markers that could help identify are Ptger3 and Pappa2.

Type 2 Expresses high Egf, with its subcluster expressing high Foxq1 (there are more but this is an example)

Type 3 Expresses high in Fos, it’s subcluster shows high expression of Egr1.

seu@meta.data$seurat_clusters <- 
  factor(
    seu@meta.data$seurat_clusters, 
    levels = c(0, 3, 1, 2, 4, 7, 12, 13,5,9, 6, 16, 10, 8, 11, 14, 15)
  )



Idents(seu) <- "seurat_clusters"

DotPlot(seu,
features = markers.to.plot3,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

seu@meta.data <- seu@meta.data %>%
  mutate(
    md_subtype = dplyr::case_when(
      seurat_clusters %in% c(0,3) ~ "type_1a",
      seurat_clusters %in% c(7) ~ "type_1b",
      seurat_clusters %in% c(1,2,4,12,13,5) ~ "type_1c",
     seurat_clusters %in% c(9,6,16,10) ~ "type_2a",
     seurat_clusters %in% c(8) ~ "type_2b",
      seurat_clusters %in% c(11) ~ "type_3a",
      seurat_clusters %in% c(14) ~ "type_3b",
      seurat_clusters == 15 ~ "type_4"
    )
  )


DimPlot(object = seu, reduction = "umap", group.by = "md_subtype", label = TRUE)

3.2 Grouping into Macula Densa Types

UMAP visualization showing the macula densa cell populations grouped into four major types, based on re-annotation using aggregated md_subtype classifications. Cells originally overclustered into subtypes (e.g., type_1a, type_1b, type_1c) were merged into broader macula densa types (type_1 through type_4) according to shared DEG markers.

seu@meta.data <- seu@meta.data %>%
  mutate(
    md_type = case_when(
      md_subtype %in% c("type_1a", "type_1b", "type_1c") ~ "type_1",
      md_subtype %in% c("type_2a", "type_2b") ~ "type_2",
      md_subtype %in% c("type_3a", "type_3b") ~ "type_3",
      md_subtype == "type_4" ~ "type_4",
    )
  )


DimPlot(object = seu, reduction = "umap", group.by = "md_type", label = TRUE)

3.3 FeaturePlot of Types

This shows our four distinct clusters

FeaturePlot(seu,c("Pappa2","Egf","Foxq1","Fos","Egr1","Cxcl10"))

3.4 DotPlots for Type Visualization

Diagonal Dotplot of Macula Densa Subtype Markers (more specific)

seu@meta.data$md_subtype <- 
  factor(seu@meta.data$md_subtype, 
         levels = c("type_1a","type_1b","type_1c","type_2a","type_2b", "type_3a", "type_3b", "type_4"))


Idents(seu) <- "md_subtype"

DotPlot(seu,
features = markers.to.plot3,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

Diagonal Dotplot of Macula Densa Type Markers

Idents(seu) <- "md_type"


markers.to.plot4 <- c("Pappa2","Robo2","Egf","Umod","Jun","Fos","Cxcl10","Isg15")

DotPlot(seu,
features = markers.to.plot4,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

4 Excel Files

Google Drive Link (Contains Excel Files)

4.1 Markers by Cluster

Some clusters reveal rare but biologically informative distinctions, which can highlight subtle cell states or activation.
For example, S100g is a marker highly expressed specifically in cluster 12, but it exhibits strong sample-specific variation, notably being much higher in sample SO4 compared to other samples.
This pattern suggests that while S100g is a useful marker for identifying this cluster, its expression may be influenced by batch effects or sample-specific conditions and should be interpreted with caution.

VlnPlot(seu,"S100g",group.by = "treatment",split.by = "sample")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

Idents(seu) <- "seurat_clusters"

DoHeatmap(seu, features = top3$gene) + NoLegend()

all_markers <- all_markers %>%
  filter(p_val_adj != 1) %>%
  arrange(desc(avg_log2FC)) %>%
  dplyr::select(gene, everything())

# Split by cluster (ident column)

marker_list <- split(all_markers, all_markers$cluster)

 

# Sort names alphanumerically

sorted_cluster_names <- names(marker_list)[order(as.numeric(names(marker_list)))]

 

# Create a workbook

wb <- createWorkbook()

 

# Add each cluster as a new worksheet

for (cluster_name in sorted_cluster_names) {

  addWorksheet(wb, sheetName = cluster_name)

  writeData(wb, sheet = cluster_name, marker_list[[cluster_name]])

}

 

date <- format(Sys.Date(), "%Y%m%d")

 

# Save workbook

saveWorkbook(wb, here("jk_code", paste0(date, "_", "_SO4_FindAllMarkers_By_Cluster.xlsx")), overwrite = TRUE)

4.2 Markers by Subtype

Marker genes defined by subtypes tend to be more robust and consistent, reliably distinguishing functional or transcriptional subpopulations within the macula densa.

These subtype markers are less likely to be confounded by technical artifacts and better represent genuine biological variation related to cell states or functions.

These subtypes are consistent with changes in principal components.

Idents(seu) <- "md_subtype"

subtype_markers <- FindAllMarkers(seu, 
                          only.pos = TRUE,
                          logfc.threshold = 0.1,
                          min.pct = 0.1,
                          min.diff.pct = 0.05,
                          return.thresh = 0.05)

subtype_markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1, p_val_adj < 0.05) %>%
    slice_head(n = 3) %>%
    ungroup() -> top3subtype

DoHeatmap(seu, features = top3subtype$gene) + NoLegend()

# Removing  insignificant genes
subtype_markers <- subtype_markers %>%
  filter(p_val_adj != 1) %>%
  arrange(desc(avg_log2FC)) %>%
  dplyr::select(gene, everything())

# Split by cluster (ident column)

marker_list <- split(subtype_markers, subtype_markers$cluster)

 
# Sort names alphanumerically

sorted_cluster_names <- names(marker_list)[order(as.numeric(names(marker_list)))]


# Create a workbook

wb <- createWorkbook()

 

# Add each cluster as a new worksheet

for (cluster_name in sorted_cluster_names) {

  addWorksheet(wb, sheetName = cluster_name)

  writeData(wb, sheet = cluster_name, marker_list[[cluster_name]])

}

date <- format(Sys.Date(), "%Y%m%d")


# Save workbook

saveWorkbook(wb, here("jk_code", paste0(date, "_", "_SO4_FindAllMarkers_By_Subtype.xlsx")), overwrite = TRUE)

4.3 Markers by MD Type

At the broader MD type level, certain markers allow clear differentiation between major macula densa subtypes.

These general markers serve as reliable signatures to classify each MD type, providing consistent identification of the main macula densa categories in this dataset.

Idents(seu) <- "md_type"

type_markers <- FindAllMarkers(seu, 
                          only.pos = TRUE,
                          logfc.threshold = 0.1,
                          min.pct = 0.1,
                          min.diff.pct = 0.05,
                          return.thresh = 0.05)

type_markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1, p_val_adj < 0.05) %>%
    slice_head(n = 3) %>%
    ungroup() -> top3type

DoHeatmap(seu, features = top3type$gene) + NoLegend()

type_markers <- type_markers %>%
  filter(p_val_adj != 1) %>%
  arrange(desc(avg_log2FC)) %>%
  dplyr::select(gene, everything())

# Split by cluster (ident column)

marker_list <- split(type_markers, type_markers$cluster)

 
# Sort names alphanumerically

sorted_cluster_names <- names(marker_list)[order(as.numeric(names(marker_list)))]


# Create a workbook

wb <- createWorkbook()

 

# Add each cluster as a new worksheet

for (cluster_name in sorted_cluster_names) {

  addWorksheet(wb, sheetName = cluster_name)

  writeData(wb, sheet = cluster_name, marker_list[[cluster_name]])

}

date <- format(Sys.Date(), "%Y%m%d")


# Save workbook

saveWorkbook(wb, here("jk_code", paste0(date, "_", "_SO4_FindAllMarkers_By_MDtype.xlsx")), overwrite = TRUE)

5 Pathway Analysis

Link Includes the Marker DEGs as well as the Gene Ontology of each Macula Densa Type and Subtype.

Google Drive Link (Contains Excel Files)

5.1 Creating Pathway Function

#  Filter Before Independently before you Run this

# Creating a function to Repeat Easily

pathway_analysis <- function(type_markers, type_name)
  {
  # Arrange markers by log2 fold change
  df2 <- type_markers %>% arrange(desc(avg_log2FC))
  DEG_list <- df2
  
  markers <- DEG_list %>% mutate(SYMBOL = gene)


ENTREZ_list <- bitr(
  geneID = DEG_list$gene,
  fromType = "SYMBOL",
  toType = "ENTREZID",
  OrgDb = org.Mm.eg.db
)

  markers <- ENTREZ_list %>% inner_join(markers, by = "SYMBOL")
  
  # Upregulated Pathway Code
  pos_markers_up <- markers %>% dplyr::filter(avg_log2FC > 0) %>% arrange(desc(abs(avg_log2FC)))
  pos_ranks_up <- pos_markers_up$ENTREZID[abs(pos_markers_up$avg_log2FC) > 0]
  pos_go_up <- enrichGO(gene = pos_ranks_up, OrgDb = "org.Mm.eg.db", ont = "BP", readable = TRUE)
  chart_up <- dotplot(pos_go_up) +
    ggtitle(paste(type_name, "Functions")) +
    theme_classic() +
    theme(
      plot.title = element_text(hjust = 0.5),
      legend.position = "left",
      axis.text.y = element_text(hjust = 0, size = 10)
    ) +
    scale_y_discrete(position = "right", labels = function(x) str_wrap(x, width = 25))
  

  return(list(
    chart_up = chart_up,
    pos_go_up = pos_go_up
  ))
}

5.2 Pathways for Each MD Type

# Creating List of Marker genes to do Pathway Analysis
type1_markers <- type_markers[type_markers$cluster == "type_1", ]
type2_markers <- type_markers[type_markers$cluster == "type_2", ]
type3_markers <- type_markers[type_markers$cluster == "type_3", ]
type4_markers <- type_markers[type_markers$cluster == "type_4", ]

# Creating a Pos Go and Chart using PA function
results_type1 <- pathway_analysis(subset(type1_markers, avg_log2FC > 0.4), "type 1")
results_type2 <- pathway_analysis(subset(type2_markers, avg_log2FC > 0.72), "type 2")
results_type3 <- pathway_analysis(subset(type3_markers, avg_log2FC > 0), "type 3")
results_type4 <- pathway_analysis(subset(type4_markers, avg_log2FC > 0), "type 4")

5.2.0.1 Type 1

results_type1
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:324] "19283" "380785" "239435" "23850" "210801" "320685" "72309" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...394 enriched terms found
## 'data.frame':    394 obs. of  12 variables:
##  $ ID            : chr  "GO:0048705" "GO:0001822" "GO:0072001" "GO:0003002" ...
##  $ Description   : chr  "skeletal system morphogenesis" "kidney development" "renal system development" "regionalization" ...
##  $ GeneRatio     : chr  "18/314" "20/314" "20/314" "21/314" ...
##  $ BgRatio       : chr  "269/28928" "375/28928" "390/28928" "454/28928" ...
##  $ RichFactor    : num  0.0669 0.0533 0.0513 0.0463 0.1 ...
##  $ FoldEnrichment: num  6.16 4.91 4.72 4.26 9.21 ...
##  $ zScore        : num  8.91 7.99 7.76 7.34 9.04 ...
##  $ pvalue        : num  1.10e-09 6.52e-09 1.26e-08 3.22e-08 3.40e-08 ...
##  $ p.adjust      : num  4.11e-06 1.22e-05 1.57e-05 2.26e-05 2.26e-05 ...
##  $ qvalue        : num  3.09e-06 9.17e-06 1.18e-05 1.70e-05 1.70e-05 ...
##  $ geneID        : chr  "Pappa2/Frem1/Mmp14/Zfand5/Hoxb9/Hoxb3/Enpp1/Hoxc8/Hoxb7/Hoxc4/Hoxd11/Flvcr1/Hoxd4/Hoxb4/Zfp950/Vdr/Hoxc9/Hoxb5" "Gfra1/Bmp2/Bmper/Cdkn1c/Robo2/Cited1/Ass1/Col4a3/Irx1/Nrp1/Col4a4/Enpp1/Bcl2/Hoxb7/Pax8/Ptk7/Gdf11/Hoxd11/Irx2/Zfp950" "Gfra1/Bmp2/Bmper/Cdkn1c/Robo2/Cited1/Ass1/Col4a3/Irx1/Nrp1/Col4a4/Enpp1/Bcl2/Hoxb7/Pax8/Ptk7/Gdf11/Hoxd11/Irx2/Zfp950" "Bmp2/Robo2/Irx1/Hoxb9/Nrp1/Hoxb3/Hoxc8/Hoxb7/Pax8/Hoxc4/Gdf11/Hoxd11/Irx2/Hoxd4/Hoxb4/Hipk2/Arl13b/Ppp2r3a/Tasor/Hoxc9/Hoxb5" ...
##  $ Count         : int  18 20 20 21 11 15 12 15 18 17 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.2.0.2 Type 2

results_type2
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:217] "330428" "16513" "268379" "53315" "15220" "69824" "13645" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...331 enriched terms found
## 'data.frame':    331 obs. of  12 variables:
##  $ ID            : chr  "GO:0031589" "GO:0015980" "GO:0031638" "GO:0010810" ...
##  $ Description   : chr  "cell-substrate adhesion" "energy derivation by oxidation of organic compounds" "zymogen activation" "regulation of cell-substrate adhesion" ...
##  $ GeneRatio     : chr  "19/211" "15/211" "8/211" "12/211" ...
##  $ BgRatio       : chr  "377/28928" "380/28928" "82/28928" "235/28928" ...
##  $ RichFactor    : num  0.0504 0.0395 0.0976 0.0511 0.039 ...
##  $ FoldEnrichment: num  6.91 5.41 13.38 7 5.35 ...
##  $ zScore        : num  9.9 7.42 9.62 7.92 6.85 ...
##  $ pvalue        : num  5.18e-11 1.49e-07 1.57e-07 1.78e-07 1.16e-06 ...
##  $ p.adjust      : num  1.61e-07 1.38e-04 1.38e-04 1.38e-04 7.20e-04 ...
##  $ qvalue        : num  1.22e-07 1.05e-04 1.05e-04 1.05e-04 5.48e-04 ...
##  $ geneID        : chr  "Ntn4/Tacstd2/Fermt1/Jag1/Ccn1/Kank1/Rhod/Dbn1/Thbs1/Fzd4/Plau/L1cam/Lamb1/Itgb6/Epb41l5/Bcam/Tnfrsf12a/Itgb8/Gas6" "Mrap2/Ppargc1a/Gabarapl1/Idh2/Cycs/Idh3a/Got1/Eva1a/Aco2/Ndufs2/Pfkm/Cs/Trap1/Uqcrfs1/Ppif" "Prss12/Ggt1/Thbs1/Plau/Cyfip2/Eno1/Pgk1/Klk1b9" "Tacstd2/Fermt1/Jag1/Ccn1/Kank1/Rhod/Dbn1/Thbs1/Fzd4/Plau/L1cam/Epb41l5" ...
##  $ Count         : int  19 15 8 12 13 15 8 14 13 11 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.2.0.3 Type 3

results_type3
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:104] "14282" "13653" "12702" "11910" "14281" "16598" "16477" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...485 enriched terms found
## 'data.frame':    485 obs. of  12 variables:
##  $ ID            : chr  "GO:0010563" "GO:0045936" "GO:0042326" "GO:0001933" ...
##  $ Description   : chr  "negative regulation of phosphorus metabolic process" "negative regulation of phosphate metabolic process" "negative regulation of phosphorylation" "negative regulation of protein phosphorylation" ...
##  $ GeneRatio     : chr  "18/98" "18/98" "17/98" "16/98" ...
##  $ BgRatio       : chr  "388/28928" "388/28928" "337/28928" "316/28928" ...
##  $ RichFactor    : num  0.0464 0.0464 0.0504 0.0506 0.0405 ...
##  $ FoldEnrichment: num  13.7 13.7 14.9 14.9 11.9 ...
##  $ zScore        : num  14.7 14.7 15 14.5 13.2 ...
##  $ pvalue        : num  1.03e-15 1.03e-15 1.74e-15 1.19e-14 6.40e-14 ...
##  $ p.adjust      : num  1.20e-12 1.20e-12 1.35e-12 6.90e-12 2.97e-11 ...
##  $ qvalue        : num  7.60e-13 7.60e-13 8.57e-13 4.38e-12 1.89e-11 ...
##  $ geneID        : chr  "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Ddit4/Gadd45a/Rgs2/Irs2/Dnaja1/Taf7/Spry1/Impact" "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Ddit4/Gadd45a/Rgs2/Irs2/Dnaja1/Taf7/Spry1/Impact" "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Gadd45a/Rgs2/Irs2/Dnaja1/Taf7/Spry1/Impact" "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Gadd45a/Rgs2/Dnaja1/Taf7/Spry1/Impact" ...
##  $ Count         : int  18 18 17 16 17 17 13 12 11 15 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.2.0.4 Type 4

results_type4
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:29] "15945" "15957" "23962" "100038882" "15959" "67775" "16145" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...170 enriched terms found
## 'data.frame':    170 obs. of  12 variables:
##  $ ID            : chr  "GO:0051607" "GO:0009615" "GO:0045088" "GO:0045069" ...
##  $ Description   : chr  "defense response to virus" "response to virus" "regulation of innate immune response" "regulation of viral genome replication" ...
##  $ GeneRatio     : chr  "14/29" "14/29" "11/29" "7/29" ...
##  $ BgRatio       : chr  "329/28928" "412/28928" "496/28928" "100/28928" ...
##  $ RichFactor    : num  0.0426 0.034 0.0222 0.07 0.0631 ...
##  $ FoldEnrichment: num  42.4 33.9 22.1 69.8 62.9 ...
##  $ zScore        : num  24 21.3 15 21.8 20.7 ...
##  $ pvalue        : num  3.05e-20 7.24e-19 8.83e-13 6.99e-12 1.47e-11 ...
##  $ p.adjust      : num  2.25e-17 2.67e-16 2.17e-10 1.29e-09 2.17e-09 ...
##  $ qvalue        : num  1.30e-17 1.54e-16 1.25e-10 7.41e-10 1.25e-09 ...
##  $ geneID        : chr  "Cxcl10/Ifit1/Oasl2/Isg15/Ifit3/Rtp4/Igtp/Gbp7/Irgm1/Stat2/Ifih1/Zc3hav1/Znfx1/Bst2" "Cxcl10/Ifit1/Oasl2/Isg15/Ifit3/Rtp4/Igtp/Gbp7/Irgm1/Stat2/Ifih1/Zc3hav1/Znfx1/Bst2" "Isg15/Igtp/Gbp7/Parp14/Irgm1/Stat2/Ifih1/Zc3hav1/Znfx1/Nfkbia/Grn" "Oasl2/Isg15/Gbp7/Ifih1/Zc3hav1/Znfx1/Bst2" ...
##  $ Count         : int  14 14 11 7 7 6 7 6 7 7 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.2.1 Saving Excel File

pathway_list <- list(
  "type 1" = results_type1$pos_go_up@result,
  "type 2" = results_type2$pos_go_up@result,
  "type 3" = results_type3$pos_go_up@result,
  "type 4" = results_type4$pos_go_up@result
)


wb <- createWorkbook()


for (sheet_name in names(pathway_list)) {
  addWorksheet(wb, sheet_name)
  writeData(wb, sheet_name, pathway_list[[sheet_name]])
}

# Save the workbook
saveWorkbook(wb, here("jk_code", "md_type_pathways_DEGS.xlsx"), overwrite = TRUE)

5.3 Pathway for each MD subtype

# Subset markers by subtype
subtype1a_markers <- subset(subtype_markers, cluster == "type_1a")
subtype1b_markers <- subset(subtype_markers, cluster == "type_1b")
subtype1c_markers <- subset(subtype_markers, cluster == "type_1c")
subtype2a_markers <- subset(subtype_markers, cluster == "type_2a")
subtype2b_markers <- subset(subtype_markers, cluster == "type_2b")
subtype3a_markers <- subset(subtype_markers, cluster == "type_3a")
subtype3b_markers <- subset(subtype_markers, cluster == "type_3b")

# Run pathway analysis for each subtype (adjust thresholds as appropriate)
results_type1a <- pathway_analysis(subset(subtype1a_markers, avg_log2FC > 0.5), "type 1a")
results_type1b <- pathway_analysis(subset(subtype1b_markers, avg_log2FC > 0.6), "type 1b")
results_type1c <- pathway_analysis(subset(subtype1c_markers, avg_log2FC > 0), "type 1c")
results_type2a <- pathway_analysis(subset(subtype2a_markers, avg_log2FC > 1), "type 2a")
results_type2b <- pathway_analysis(subset(subtype2b_markers, avg_log2FC > 2), "type 2b")
results_type3a <- pathway_analysis(subset(subtype3a_markers, avg_log2FC > 0),   "type 3a")
results_type3b <- pathway_analysis(subset(subtype3b_markers, avg_log2FC > 0),   "type 3b")

5.3.0.1 Type 1a

results_type1a
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:194] "239435" "19283" "23850" "12960" "72309" "320685" "66815" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...47 enriched terms found
## 'data.frame':    47 obs. of  12 variables:
##  $ ID            : chr  "GO:0002526" "GO:0006953" "GO:0141156" "GO:0009636" ...
##  $ Description   : chr  "acute inflammatory response" "acute-phase response" "cAMP/PKA signal transduction" "response to toxic substance" ...
##  $ GeneRatio     : chr  "9/187" "6/187" "4/187" "9/187" ...
##  $ BgRatio       : chr  "122/28928" "51/28928" "25/28928" "234/28928" ...
##  $ RichFactor    : num  0.0738 0.1176 0.16 0.0385 0.1429 ...
##  $ FoldEnrichment: num  11.41 18.2 24.75 5.95 22.1 ...
##  $ zScore        : num  9.3 9.92 9.58 6.13 9.01 ...
##  $ pvalue        : num  1.06e-07 9.52e-07 1.92e-05 2.32e-05 3.07e-05 ...
##  $ p.adjust      : num  0.000276 0.001242 0.010416 0.010416 0.010416 ...
##  $ qvalue        : num  0.000235 0.001056 0.008855 0.008855 0.008855 ...
##  $ geneID        : chr  "Nupr1/C2cd4b/Lbp/Ptgs2/Hfe/Ass1/Ptges/Gstp1/Plscr1" "Lbp/Ptgs2/Hfe/Ass1/Ptges/Plscr1" "Calcr/Ramp3/Sfn/Prkar1b" "Lcn2/Nupr1/Ptgs2/Ass1/Ptges/Slc22a18/Pon2/Gstp1/Cnp" ...
##  $ Count         : int  9 6 4 9 4 4 11 4 4 3 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.3.0.2 Type 1b

results_type1b
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:216] "380785" "74480" "244682" "103948" "19218" "171531" "54156" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...62 enriched terms found
## 'data.frame':    62 obs. of  12 variables:
##  $ ID            : chr  "GO:0099173" "GO:0031346" "GO:0060996" "GO:0051647" ...
##  $ Description   : chr  "postsynapse organization" "positive regulation of cell projection organization" "dendritic spine development" "nucleus localization" ...
##  $ GeneRatio     : chr  "13/203" "15/203" "8/203" "5/203" ...
##  $ BgRatio       : chr  "314/28928" "471/28928" "138/28928" "44/28928" ...
##  $ RichFactor    : num  0.0414 0.0318 0.058 0.1136 0.0332 ...
##  $ FoldEnrichment: num  5.9 4.54 8.26 16.19 4.74 ...
##  $ zScore        : num  7.34 6.51 7.19 8.48 5.75 ...
##  $ pvalue        : num  3.87e-07 1.36e-06 6.23e-06 1.41e-05 2.44e-05 ...
##  $ p.adjust      : num  0.00113 0.00199 0.00609 0.01032 0.01283 ...
##  $ qvalue        : num  0.000954 0.001674 0.005123 0.008683 0.010798 ...
##  $ geneID        : chr  "Epha4/Flrt2/Myh10/Kif1a/Magi2/Ptprs/Mpp2/Abl2/Prmt3/Sh3gl2/Tanc2/Ephb2/Cadm1" "Epha4/Nin/Ntn1/Shtn1/Cnr1/Crocc/Ccp110/Magi2/Vldlr/Sirt1/Abl2/Ccdc88a/Scn1b/Ephb2/Ttbk2" "Epha4/Mapk6/Kif1a/Ptprs/Srgap2/Prmt3/Tanc2/Ephb2" "Ntn1/Myh10/Kif1a/Sirt1/Tacc2" ...
##  $ Count         : int  13 15 8 5 11 7 10 10 3 4 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.3.0.3 Type 1c

results_type1c
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:41] "12309" "243813" "73102" "74442" "333050" "243537" "117600" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...241 enriched terms found
## 'data.frame':    241 obs. of  12 variables:
##  $ ID            : chr  "GO:0006816" "GO:0003015" "GO:0043271" "GO:1903522" ...
##  $ Description   : chr  "calcium ion transport" "heart process" "negative regulation of monoatomic ion transport" "regulation of blood circulation" ...
##  $ GeneRatio     : chr  "8/38" "6/38" "5/38" "6/38" ...
##  $ BgRatio       : chr  "473/28928" "256/28928" "141/28928" "279/28928" ...
##  $ RichFactor    : num  0.0169 0.0234 0.0355 0.0215 0.0157 ...
##  $ FoldEnrichment: num  12.9 17.8 27 16.4 11.9 ...
##  $ zScore        : num  9.44 9.82 11.22 9.36 8.44 ...
##  $ pvalue        : num  1.53e-07 9.86e-07 1.13e-06 1.62e-06 1.67e-06 ...
##  $ p.adjust      : num  0.000239 0.000517 0.000517 0.000517 0.000517 ...
##  $ qvalue        : num  0.00013 0.000281 0.000281 0.000281 0.000281 ...
##  $ geneID        : chr  "Cacna1d/Nos1/Ramp3/Bcl2/Atp2b4/Ptgs2/Usp2/Camk2d" "Cacna1d/Nos1/Ramp3/Atp2b4/Camk2d/Avpr1a" "Nos1/Bcl2/Ptgs2/Usp2/Camk2d" "Itga4/Cacna1d/Nos1/Atp2b4/Ptgs2/Avpr1a" ...
##  $ Count         : int  8 6 5 6 7 6 4 6 3 4 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.3.0.4 Type 2a

results_type2a
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:44] "19142" "57764" "20315" "19419" "22242" "432530" "19649" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...97 enriched terms found
## 'data.frame':    97 obs. of  12 variables:
##  $ ID            : chr  "GO:0050678" "GO:0001667" "GO:1905330" "GO:0016055" ...
##  $ Description   : chr  "regulation of epithelial cell proliferation" "ameboidal-type cell migration" "regulation of morphogenesis of an epithelium" "Wnt signaling pathway" ...
##  $ GeneRatio     : chr  "7/44" "6/44" "4/44" "7/44" ...
##  $ BgRatio       : chr  "432/28928" "278/28928" "75/28928" "467/28928" ...
##  $ RichFactor    : num  0.0162 0.0216 0.0533 0.015 0.022 ...
##  $ FoldEnrichment: num  10.65 14.19 35.06 9.85 14.48 ...
##  $ zScore        : num  7.89 8.62 11.53 7.53 7.96 ...
##  $ pvalue        : num  3.75e-06 3.88e-06 5.23e-06 6.25e-06 2.41e-05 ...
##  $ p.adjust      : num  0.00253 0.00253 0.00253 0.00253 0.00723 ...
##  $ qvalue        : num  0.00161 0.00161 0.00161 0.00161 0.00461 ...
##  $ geneID        : chr  "Cxcl12/Egf/Ccnd1/Sulf2/Gas1/Tacstd2/Bmp4" "Cxcl12/Fermt1/Amotl2/Tacstd2/Bmp4/Kank1" "Ntn4/Egf/Tacstd2/Bmp4" "Egf/Ccnd1/Sulf2/Fermt1/Amotl2/Sostdc1/Kank1" ...
##  $ Count         : int  7 6 4 7 5 5 5 5 5 4 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.3.0.5 Type 2b

results_type2b
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:81] "330428" "16513" "15220" "16373" "53315" "268379" "26944" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...116 enriched terms found
## 'data.frame':    116 obs. of  12 variables:
##  $ ID            : chr  "GO:1905330" "GO:0072006" "GO:0072080" "GO:0001822" ...
##  $ Description   : chr  "regulation of morphogenesis of an epithelium" "nephron development" "nephron tubule development" "kidney development" ...
##  $ GeneRatio     : chr  "6/79" "7/79" "6/79" "9/79" ...
##  $ BgRatio       : chr  "75/28928" "180/28928" "113/28928" "375/28928" ...
##  $ RichFactor    : num  0.08 0.0389 0.0531 0.024 0.0504 ...
##  $ FoldEnrichment: num  29.29 14.24 19.44 8.79 18.46 ...
##  $ zScore        : num  12.84 9.32 10.28 7.94 9.99 ...
##  $ pvalue        : num  5.93e-08 6.38e-07 6.85e-07 8.69e-07 9.29e-07 ...
##  $ p.adjust      : num  0.000116 0.000337 0.000337 0.000337 0.000337 ...
##  $ qvalue        : num  0.000083 0.000241 0.000241 0.000241 0.000241 ...
##  $ geneID        : chr  "Tacstd2/Egf/Alox12/Bmp4/Sall1/Ntn4" "Irx3/Jag1/Tacstd2/Umod/Sulf2/Bmp4/Sall1" "Irx3/Jag1/Tacstd2/Umod/Bmp4/Sall1" "Irx3/Jag1/Tacstd2/Umod/Sulf2/Bmp4/Sall1/Lrrk2/Glis2" ...
##  $ Count         : int  6 7 6 9 6 5 9 6 9 3 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.3.0.6 Type 3a

results_type3a
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:37] "14281" "15511" "193740" "16476" "16477" "16007" "12702" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...243 enriched terms found
## 'data.frame':    243 obs. of  12 variables:
##  $ ID            : chr  "GO:0061077" "GO:0051085" "GO:0051084" "GO:0006458" ...
##  $ Description   : chr  "chaperone-mediated protein folding" "chaperone cofactor-dependent protein refolding" "'de novo' post-translational protein folding" "'de novo' protein folding" ...
##  $ GeneRatio     : chr  "6/36" "5/36" "5/36" "5/36" ...
##  $ BgRatio       : chr  "69/28928" "33/28928" "44/28928" "46/28928" ...
##  $ RichFactor    : num  0.087 0.152 0.114 0.109 0.167 ...
##  $ FoldEnrichment: num  69.9 121.8 91.3 87.3 133.9 ...
##  $ zScore        : num  20.2 24.5 21.2 20.7 23 ...
##  $ pvalue        : num  2.71e-10 5.17e-10 2.34e-09 2.95e-09 2.11e-08 ...
##  $ p.adjust      : num  3.61e-07 3.61e-07 1.03e-06 1.03e-06 5.89e-06 ...
##  $ qvalue        : num  2.14e-07 2.14e-07 6.12e-07 6.12e-07 3.50e-06 ...
##  $ geneID        : chr  "Hspa1b/Hspa1a/Hspb1/Dnajb4/Dnajb1/Hspa8" "Hspa1b/Hspa1a/Dnajb4/Dnajb1/Hspa8" "Hspa1b/Hspa1a/Dnajb4/Dnajb1/Hspa8" "Hspa1b/Hspa1a/Dnajb4/Dnajb1/Hspa8" ...
##  $ Count         : int  6 5 5 5 4 8 6 5 5 7 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.3.0.7 Type 3b

results_type3b
## $chart_up

## 
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:166] "13654" "13653" "11910" "14282" "12702" "16598" "14825" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...760 enriched terms found
## 'data.frame':    760 obs. of  12 variables:
##  $ ID            : chr  "GO:0042326" "GO:0010563" "GO:0045936" "GO:0001933" ...
##  $ Description   : chr  "negative regulation of phosphorylation" "negative regulation of phosphorus metabolic process" "negative regulation of phosphate metabolic process" "negative regulation of protein phosphorylation" ...
##  $ GeneRatio     : chr  "20/157" "21/157" "21/157" "18/157" ...
##  $ BgRatio       : chr  "337/28928" "388/28928" "388/28928" "316/28928" ...
##  $ RichFactor    : num  0.0593 0.0541 0.0541 0.057 0.0504 ...
##  $ FoldEnrichment: num  10.94 9.97 9.97 10.5 9.29 ...
##  $ zScore        : num  13.6 13.1 13.1 12.5 11.6 ...
##  $ pvalue        : num  2.74e-15 3.30e-15 3.30e-15 1.47e-13 1.16e-12 ...
##  $ p.adjust      : num  3.19e-12 3.19e-12 3.19e-12 1.06e-10 6.71e-10 ...
##  $ qvalue        : num  2.12e-12 2.12e-12 2.12e-12 7.05e-11 4.45e-10 ...
##  $ geneID        : chr  "Socs3/Dusp1/Gadd45b/Jun/Irf1/Ppp1r15a/Gadd45g/Hspb1/Socs1/Cdkn1a/Errfi1/Trib1/Irs2/Gadd45a/Rgs2/Spry1/Dnaja1/Cd"| __truncated__ "Socs3/Dusp1/Gadd45b/Jun/Irf1/Ppp1r15a/Gadd45g/Hspb1/Socs1/Cdkn1a/Errfi1/Trib1/Irs2/Gadd45a/Rgs2/Spry1/Dnaja1/Cd"| __truncated__ "Socs3/Dusp1/Gadd45b/Jun/Irf1/Ppp1r15a/Gadd45g/Hspb1/Socs1/Cdkn1a/Errfi1/Trib1/Irs2/Gadd45a/Rgs2/Spry1/Dnaja1/Cd"| __truncated__ "Socs3/Dusp1/Gadd45b/Jun/Irf1/Ppp1r15a/Gadd45g/Hspb1/Socs1/Cdkn1a/Errfi1/Trib1/Gadd45a/Rgs2/Spry1/Dnaja1/Taf7/Impact" ...
##  $ Count         : int  20 21 21 18 18 19 14 15 13 17 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722

5.3.1 Saving Excel File for Subtype Pathways

Assign your markers as a vector genes_to_plot <- c( “Fos”, “Socs3”, “Gadd45b”, “Wee1”, “Hspa1a”, “Sat1”, “Cxcl12”, “Itpr2”, “Bmp4”, “Casr”, “Grin2c”, “Irx3”, “Rap1gap”, “App”, “Wwc1”, “Syt5”, “Syn3”, “Cacna1d”, “Slc6a7”, “Robo2”, “Begain”, “Pappa1”, “Nos1”, “Ptgs2”, “Bmp2”, “Atp2a3”

6 Comparison to Article + Database

## Warning: The following requested variables were not found: Pappa1

FeaturePlots

6.1 Figure C

These are the genes plotted on figure C on the article

Figure C
Figure C

6.1.1 MD 3

Each gene seems to be consistent with this macula dataset except Ccn1

6.1.2 MD 5

MD 5 genes shown in this dataset

6.1.3 MD ALL

All of these genes are enriched in every cluster

6.2 PartekFlow

6.2.1 Type 3 Markers

6.2.2 Type 2 Markers

6.2.3 Type 1 Markers

6.2.4 Type 1 Subcluster Possibility?

## Warning: The following requested variables were not found: Pappa1

## Warning: The following requested variables were not found: Pappa1

7 Save Annotated Seurat Object For ShinyCell

save(seu, file = here("jk_code", "macula_densa_final.rds"))